In [1]:
import os
import Bio 
import re
import sys
from Bio import SeqIO
from Bio.Blast import NCBIXML
from Bio import Restriction 
from Bio.Restriction import *
from Bio.Alphabet.IUPAC import IUPACAmbiguousDNA
from Bio.Seq import Seq
from Bio.SeqRecord import SeqRecord
from Bio.Blast.Applications import NcbiblastnCommandline
from Bio import SeqFeature
from Bio.SeqFeature import *

Make a sequence with some features

seqstr = "CCGGATGCA" 2 + "TTTTTTTTTATTTTCCGGTTTTTTTCCAGGTTTTTTTTTTTCTAG" 50 inputseq = SeqRecord(MutableSeq(seqstr, IUPACAmbiguousDNA()), id="testseq", description="testseq", name = "test") inputseq.features.append(SeqFeature(FeatureLocation(ExactPosition(2), ExactPosition(12)), type = "testTs1", strand = 1)) inputseq.features.append(SeqFeature(FeatureLocation(ExactPosition(2), ExactPosition(30)), type = "testfromAs", strand = -1)) inputseq.features.append(SeqFeature(FeatureLocation(ExactPosition(15), ExactPosition(80)), type = "test3", strand = -1))


In [125]:
def string2feat(queryseq, ampsdict): #lib5pr is subjectseq; t7 is queryseq
    '''
    This function accepts a query seq and a dictionary of subjectseqs, where the key (amp)
    is contained in a field in queryseq, highlighting the location of queryseq in it. 
    Returns a string.
    '''
    subjectseq = SeqRecord(ampsdict[queryseq[1][0]])
    #for seqrecord in subjectseq:
    locstart = queryseq[1][1]
    #print queryseq
    locend = queryseq[1][2]
    fwdlocs = []  
    revlocs = []
    # Figure out which strand the BLAST hit is on
    if locstart <= locend:
        fwdlocs.append(locstart)
    if locstart > locend:
        revlocs.append(locend)
    
    for item in fwdlocs:
        start = ExactPosition(int(item))
        end = ExactPosition(int((item) + len(queryseq[0].seq) + 1))
        location = FeatureLocation(start, end)
        feature = SeqFeature(location,type=str("cutsite_fwd"), strand = +1)
        subjectseq.features.append(feature)

    for item in revlocs:
        start = ExactPosition(int(item))
        end = ExactPosition(start + len(queryseq[0].seq))
        location = FeatureLocation(start, end)
        feature = SeqFeature(location,type=str("cutsite_rev"), strand = -1)
        subjectseq.features.append(feature)
    #print subjectseq.features
    return subjectseq

In [121]:
directlabel = 1
addpamcutters = 1

In [124]:
def print_features(inputseq):
    if addpamcutters == 1:
        cutline = list(" " * len(inputseq))
        HpaIIsites = HpaII.search(inputseq.seq)
        BfaIsites = BfaI.search(inputseq.seq)
        ScrFIsites = ScrFI.search(inputseq.seq)
        for cut in HpaIIsites:
            cutline[cut-1:cut + len("HpaII")] = "<HpaII"
        for cut in BfaIsites:
            cutline[cut-1:cut + len("BfaI")] = "<BfaI"
        for cut in ScrFIsites:
            cutline[cut-1:cut + len("ScrFI")] = "<ScrFI"
        cutline = "".join(cutline)
    
    
    mask = [list((("-" * 9) + "^" )* int(round(len(inputseq.seq)/10.0)))]
    newmaskline = list((("-" * 9) + "^" )* int(round(len(inputseq.seq)/10.0)))

    for feature in inputseq.features:
        # Make a new marker strand if any features overlap. All marker strands can be elements of a list.
        featstart = int(feature.location.start)
        featend = int(feature.location.end)
        if featstart > featend:
            print("Error! Feature end must be after feature start. Use strand to specify direction! Feature " + feature.type + \
                  " will not be displayed!")
        #if "<" in mask[-1][featstart:featend] or ">" in mask[-1][featstart:featend]:
            #mask.append(newmaskline)

        clean = 0
        for item in mask[-1][featstart:featend]:
            if item == "-":
                clean = 1
            elif item == "^":
                clean = 1
            else:
                clean = 0
                mask.append(newmaskline)
                break

        #print mask[-1][0:50]
        if feature.strand == 1:
            mask[-1] = mask[-1][:featstart-1] + [">"] * int(featend - featstart + 1) + mask[-1][featend:]
            if directlabel == 1:
                mask[-1] = mask[-1][:featstart] + list(str(feature.type)) + mask[-1][featstart + len(str(feature.type)):]
        if feature.strand == -1:
            mask[-1] = mask[-1][:featstart-1] + ["<"] * int(featend+1 - featstart) + mask[-1][featend:]
            if directlabel == 1:
                mask[-1] = mask[-1][:featstart+1] + list(str(feature.type)) + mask[-1][featstart+2 + len(str(feature.type)):]

    #if addpamcutters = 1:
        #cutline = list(" " * len(inputseq)
        #HpaIIsites = HpaII.search(inputseq.seq)



    for index, maskline in enumerate(mask):
        maskline = "".join(maskline)
        mask[index] = maskline
    # add labels
    if directlabel == 0:
        masklab = list(" " * (len(inputseq.seq)))
        for feature in inputseq.features:
            featstart = int(feature.location.start)
            featend = int(feature.location.end)
            featname = str(feature.type)
            masklab = masklab[:featstart] + list(str(feature.type)) + list(" " * (featend-1 - featstart - len(feature.type))) + masklab[featend-1:]
        masklab = "".join(masklab)

    lines = int(round(len(inputseq.seq) / 100)) + 1
    i = 0
    outstring = list(inputseq.name + "\n")
    #print inputseq.name
    outstring = []
    while i < lines:
        indexstart = i*100
        indexend = (i+1) * 100
        if indexend > len(inputseq.seq):
            indexend = len(inputseq.seq)
        if addpamcutters ==1:
            outstring.extend((str(indexstart + 1)) + "  " + cutline[indexstart:indexend] + "   " + str(indexend)+ "\n")
            #print (str(indexstart + 1)) + "  " + cutline[indexstart:indexend] + "   " + str(indexend)
        outstring.extend(str(indexstart+1) + "  " + inputseq.seq[indexstart:indexend] + "   " + str(indexend)+ "\n")
        #print str(indexstart+1) + "  " + inputseq.seq[indexstart:indexend] + "   " + str(indexend)
        for maskline in mask:
            outstring.extend((str(indexstart + 1)) + "  " + maskline[indexstart:indexend] + "   " + str(indexend)+ "\n")
            #print (str(indexstart + 1)) + "  " + maskline[indexstart:indexend] + "   " + str(indexend)
        if directlabel == 0:
            outstring.extend(str(indexstart +1) + "  " + masklab[indexstart:indexend] + "   " + str(indexend)+ "\n")
            #print str(indexstart +1) + "  " + masklab[indexstart:indexend] + "   " + str(indexend)
        outstring.extend("\n")
        #print "\n"
        i = i + 1
    outstring = "".join(outstring)
    return outstring

clean = 0 for item in mask[0][0:9]: if item == "-": print item clean = 1 elif item == "^": print item clean = 1 else: clean = 0 break print a